# encoding=utf-8
import jieba

def word_segment(path):
    f = open(path,"r",encoding="utf-8")
    fw = open(path+".seg","w",encoding="utf-8")

    for index,line in enumerate(f):
        ddd = jieba.cut(line, cut_all=False)
        fw.write(' '.join([item for item in ddd if len(item)>1])+"\n")
        if index%1000==0:
            print(index)
    f.close()
    fw.close()


word_segment("C:/Users/ffftzh/Desktop/语料/proc_caita.txt")

